Python Tensorflow实现识别猫与狗

环境准备

1.python 环境,建议python3.8
2.anaconda
anaconda 安装
https://www.anaconda.com/products/individual#Downloads
下载对应版本anconda即可,这里不介绍anconda安装过程了。
安装可以参考:https://blog.csdn.net/qq_43674360/article/details/123396415

环境搭建

创建虚拟环境

1
conda create -n tf python=3.8
1
conda activate  tf

下载

1
pip install tensorflow -i https://pypi.tuna.tsinghua.edu.cn/simple

数据集准备

可下载数据集
https://www.kaggle.com/c/dogs-vs-cats/data

数据集目录如下

1
2
3
4
├─dogs-vs-cats
│ ├─test1.zip
│ ├─train.zip
│ └─sampleSubmission.csv

数据集处理

处理训练集的数据结构
data_read.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os

filenames = os.listdir('./dogs-vs-cats/train')
# 动物类型
categories = []
for filename in filenames:
category = filename.split('.')[0]
categories.append(category)





import pandas as pd
# 结构化数据
df = pd.DataFrame({
'filename':filenames,
'category':categories
})




#展示对应的数据
import random
from keras.preprocessing import image
import matplotlib.pyplot as plt
## 看看结构化之后的结果
# print(df.head())
# print(df.tail())
# print(df['category'].value_counts())
# df['category'].value_counts().plot(kind = 'bar')
# plt.show()

# 展示个图片看看
sample = random.choice(filenames)
#image = image.load_img('./dogs-vs-cats/train/' + sample)

# from keras.preprocessing import image
from keras.utils import image_utils

img_keras = image_utils.load_img('./dogs-vs-cats/train/' + sample)

# plt.imshow(img_keras)
# plt.show()













from sklearn.model_selection import train_test_split



# 切割训练集合

train_df, validate_df = train_test_split(df, test_size = 0.20, random_state = 42)

train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

# print(train_df.head())
# print(validate_df.head())
print(train_df)
# print(validate_df)

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

print("Total number of example in training dataset : {0}".format(total_train))
print("Total number of example in validation dataset : {0}".format(total_validate))

运行查看,是否正确读取数据集

创建模型

创建一个模型代码如下
create_model.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Flatten,Dropout
from tensorflow.keras import optimizers


## 创建第一个模型
class Model:
def __init__(self, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS):
self.IMG_WIDTH = IMG_WIDTH
self.IMG_HEIGHT = IMG_HEIGHT
self.IMG_CHANNELS = IMG_CHANNELS

def create_model(self):
model = Sequential()
#第一层
#图像空间的2维卷积 32个卷积输出滤波器,卷积窗口的高度和宽度(3,3),输入像素150*150
model.add(Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_uniform',
padding='same',input_shape = (150, 150, 3)))

#卷积窗口的高度和宽度降低为(2,2)
model.add(MaxPooling2D((2,2)))

#第二层
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))

#第三层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))

#第四层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))

#Flatten层用来将输入“压平”,即把多维的输入一维化
model.add(Flatten())
#全链接层,输出空间的维数
model.add(Dense(512, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

from keras import optimizers
# 设置损失算法与优化
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-4), metrics =['acc'])
return model

运行之后,可在文件夹下找到my_model1.th 模型文件

训练模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# import create_model as Model

from data_read import train_df,validate_df
from create_model import Model

# 初始化模型
IMG_WIDTH = 150
IMG_HEIGHT = 150
IMG_CHANNELS = 3

model = Model(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)
model_1 = model.create_model()
model_1.summary()


from keras.preprocessing.image import ImageDataGenerator

#原来是255的像素做 0与1的处理
train_imgdatagen = ImageDataGenerator(rescale = 1./255)
valid_imgdatagen = ImageDataGenerator(rescale = 1./255)


train_generator_m1 = train_imgdatagen.flow_from_dataframe(
train_df,
directory="./dogs-vs-cats/train",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)

validation_generator_m1 = valid_imgdatagen.flow_from_dataframe(
validate_df,
directory="./dogs-vs-cats/test1",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)


import numpy as np

# model 1 开始训练
history_1 = model_1.fit(
train_generator_m1,
epochs = 3,
steps_per_epoch = 100,
validation_data = validation_generator_m1,
validation_steps = 50
)



#保存模型
model_1.save('./model_1.h5')



#打印训练结果
print(np.mean(history_1.history['acc']))
print(np.mean(history_1.history['val_acc']))



#形成图像结果
#import matplotlib.pyplot as plt
# plt.plot(history_1.history['acc'], color = 'black')
# plt.plot(history_1.history['val_acc'], color = 'blue')
# plt.title('Training and validation accuracy of model 1')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy’)4
# plt.show()
# plt.plot(history_1.history['loss'], color = 'black')
# plt.plot(history_1.history['val_loss'], color = 'blue')
# plt.title('Training and validation loss of model 1')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.show()

验证模型

验证代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

from numpy.core.fromnumeric import ptp
import pandas as pd
import os
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.models import load_model


from tensorflow import keras
from keras.layers import Dense
from keras.models import Sequential, load_model

model = load_model('model_1.h5')

test_filenames = os.listdir('./dogs-vs-cats/test2cat')

test_df = pd.DataFrame(
{
'id' : test_filenames
}
)


# print(test_df)


test_gen = ImageDataGenerator(rescale = 1./255)

# test_generator = test_gen.flow_from_dataframe(
# test_df,
# "./dogs-vs-cats/test1",
# x_col='id',
# y_col=None,
# target_size = (128, 128), # resize image to 150x150
# batch_size = 1,
# class_mode = None,
# shuffle=False,
# validate_filenames=False
# )


# test_generator = test_gen.flow_from_dataframe(
# test_df,
# directory="./dogs-vs-cats/test1",
# x_col='id',
# y_col=None,
# target_size = (150, 150), # resize image to 150x150
# batch_size = 1,
# class_mode = None,
# shuffle=False,
# validate_filenames=False
# )

test_generator = test_gen.flow_from_dataframe(
test_df,
directory="./dogs-vs-cats/test2cat",
x_col='id',
y_col=None,
target_size = (150, 150), # resize image to 150x150
batch_size = 1,
class_mode = None,
shuffle=False,
validate_filenames=False
)
print(test_generator)

predictions = model.predict(test_generator)

print(predictions)
pred = [1 if p > 0.5 else 0 for p in predictions]

test_df['category'] = pred

test_df['category'].value_counts().plot.bar()
plt.show()
plt.show()
plt.show()

一辈子很短,努力的做好两件事就好;
第一件事是热爱生活,好好的去爱身边的人;
第二件事是努力学习,在工作中取得不一样的成绩,实现自己的价值,而不是仅仅为了赚钱;

继开 wechat
欢迎加我的微信,共同交流技术